In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
from ml_core.utils import Normalization, DataProcess
from ml_core.regression import GradientDescent, OrdinaryLeastSquares, StochasticGradientDescent
from sklearn.model_selection import train_test_split
from ml_core.metrics import ErrorMetrics
In [2]:
df = pd.read_csv('data/artificial1d.csv', header=None, names=['x', 'y'])
In [3]:
df.dtypes
Out[3]:
x    float64
y    float64
dtype: object

Preparando os Dados¶

In [4]:
normalized_x, _ = Normalization.z_score_normalization(df['x'].to_numpy())
In [5]:
normalized_y, denormalized_y = Normalization.z_score_normalization(df['y'].to_numpy())
In [6]:
X_ones = DataProcess.add_ones_column(normalized_x)

y = DataProcess.reshape_vector(normalized_y)

Funções de Visualização¶

In [7]:
def show_rmse_curve(history, title='RMSE Curve'):
    
    rmse_values = [step['rmse_error'] for step in history]
    
    steps = [i for i, _ in enumerate(history)]
    
    fig = px.line(x=steps, y=rmse_values, labels={'x':'steps', 'y':'RMSE'}, title=title)
    
    return fig
In [8]:
def show_final_result(history, 
                      denormalized_function=denormalized_y,
                      title='Model Result', 
                      x=df['x'],
                      y=df['y'],
                     test_matrix=X_ones):
    
    if isinstance(history, list):
        final_model = history[-1]['model']
    else:
        final_model = history
    
    preds = denormalized_y(final_model.predict(test_matrix))
    
    fig = px.scatter(x=x, y=y, title=title)
    
    fig.add_trace(
        go.Scatter(x=x, y=preds[:, 0])
    )
    
    return fig
    
    

GD¶

In [9]:
gd = GradientDescent(ephocs=1000, with_history_predictions=True, l2_regulazation=0.001)
In [10]:
history_gd = gd.fit(X_ones, y)
  0%|          | 0/1000 [00:00<?, ?it/s]
In [11]:
show_rmse_curve(history_gd)
In [12]:
show_final_result(history_gd)

OLS¶

In [13]:
ols = OrdinaryLeastSquares(ephocs=1000, with_history_predictions=True)
In [14]:
model_ols = ols.fit(X_ones, y)
In [15]:
show_final_result(model_ols)

GDE¶

In [16]:
gde = StochasticGradientDescent(ephocs=100, with_history_predictions=True, l2_regulazation=0.001)
In [17]:
history_gde = gde.fit(X_ones, y)
  0%|          | 0/100 [00:00<?, ?it/s]
In [18]:
show_rmse_curve(history_gde)
In [19]:
show_final_result(history_gde)

Transformação Polinomial¶

In [20]:
X_pow = DataProcess.generate_polynomial_order(X_ones, 11)
In [21]:
model_p_ols = ols.fit(X_pow, y)
In [22]:
show_final_result(model_p_ols, test_matrix=X_pow)

Analise de Desempenho¶

Preparando os Dados¶

In [23]:
df_p = pd.read_csv('data/california.csv', header=None)
In [24]:
arr = df_p.to_numpy()

Separando os Dados¶

In [25]:
X_p = arr[:, :8]
In [26]:
y_p = DataProcess.reshape_vector(arr[:, 8])

Normalizando os Dados¶

In [27]:
X_norm = np.empty_like(X_p)

for idx, col in enumerate(X_p.T):
    
    norm, _= Normalization.z_score_normalization(col)
    
    X_norm[:, idx] = norm
In [28]:
y_norm, denormalized_y_p = Normalization.z_score_normalization(y_p)

Bateria de Testes¶

In [29]:
def my_greate_test(max_order, l2_reg=0):
    
    orders = [i for i in range(1, max_order)]
    
    ols = OrdinaryLeastSquares(l2_regulazation=l2_reg)
    
    rmse_erros_train = []
    rmse_erros_test = []
    
    X_trn, X_tst, y_trn, y_tst = train_test_split(X_norm, y_norm, test_size=0.2, random_state=42)
    
    for order in orders:
        
        X_train_pow = DataProcess.generate_polynomial_order(X_trn, order, with_bias=True)
        X_tst_pow = DataProcess.generate_polynomial_order(X_tst, order, with_bias=True)
        
        model = ols.fit(X_train_pow, y_trn)
        
        preds_train = model.predict(X_train_pow)
        preds_test = model.predict(X_tst_pow)
        
        rmse_erros_train.append(ErrorMetrics.rmse(
        denormalized_y_p(y_trn),
        denormalized_y_p(preds_train)
        ))
        
        rmse_erros_test.append(ErrorMetrics.rmse(
        denormalized_y_p(y_tst),
        denormalized_y_p(preds_test)
        ))
    
    
    plots = [
        go.Scatter(x=orders, y=rmse_erros_train, name='Train RMSE'),
        go.Scatter(x=orders, y=rmse_erros_test, name='Test RMSE')
    ]
        
    fig = go.Figure(data=plots)

    return fig
        
    
In [30]:
fig = my_greate_test(11)
In [31]:
fig
In [32]:
fig = my_greate_test(11, l2_reg=0.001)
In [33]:
fig
In [ ]: